
#Convenience Functions and Data Processing

library(dplyr)
library(ggplot2)
library(stargazer)

if(length(tryCatch(list.dirs(path="~/Library/CloudStorage/Dropbox/Robots and Trade in Services"),error=function(e) e))==0){
  base.path<-"~/Dropbox/Robots and Trade in Services/"
} else {
  base.path<-"~/Library/CloudStorage/Dropbox/Robots and Trade in Services/"
}

######################
## DEFINE FUNCTIONS ##
######################

#Robust errors
robust_se<-function(mdl,...){
  #Bootstrapped standard errors for quantile regressions
  if(class(mdl)=="rq"){
    sqrt(diag(cov(quantreg::boot.rq(x=mdl$x,y=mdl$y,tau=mdl$tau,...)$B)))
  } else {
    sqrt(diag(sandwich::vcovHC(mdl,type="HC1")))
  }
  
}

#Convenience function for showing results
show_results<-function(mdl,se_type=list("robust"),type="latex",R_input=1000,...){
  #Ensure mdl is always a list object
  if(class(mdl)[1]!="list"){mdl<-list(mdl)}
  
  if(class(se_type)!="list"){
    stop("se_type must be a list")
  }
  
  #Ensure se_type list length is either 1 or equal to the number of models
  if(length(se_type)!=length(mdl) & length(se_type)==1){
    se_type<-rep(se_type,length(mdl))
  } else if(length(se_type)!=length(mdl) & length(se_type)!=1) {
    stop("se_type must either be length 1 or be of length mdl")
  }
  
  stargazer::stargazer(mdl,se=lapply(1:length(mdl),function(x){ if(se_type[[x]]=="robust"){robust_se(mdl[[x]],R=R_input)} else{NULL}}) ,type=type,header=FALSE,...)
  
}

###################
## Load Datasets ##
###################

#Codebook for translating the values into factors
lucid.codebook<-read.csv(paste0(base.path,"Data/lucid-demographic-codebook.csv"),stringsAsFactors = FALSE) %>%
  #Light processing and combining some categories
  mutate(demographic=ifelse(value==-3105 | demographic=="Prefer not to answer",NA,demographic),
         demographic=gsub("Asian \\*\\*\\* ","",demographic),
         demographic=ifelse(question=="ethnicity" & value %in% c(3,10,11,12,13,14,15), "Other",demographic),
         demographic=ifelse(question=="hhi",gsub(",","",stringr::str_extract(demographic,"[0-9,]+")),demographic),
         demographic=ifelse(demographic=="14999","0",demographic)
  )

lucid.codebook.list<-split(lucid.codebook,lucid.codebook$question,drop = TRUE)
#Renaming as appropriate for merging later
lucid.codebook.proc<-lapply(1:length(lucid.codebook.list), function(x){ 
  lucid.codebook.list[[x]] %>%
    #mutate(value=as.character(value)) %>%
    rename(
      !!sym(names(lucid.codebook.list)[x]):=value,
      !!sym(paste0(names(lucid.codebook.list)[x],"_str")):=demographic
    ) %>% 
    select(-question)
})

#This data frame translates between the occupations defined by Acemoglu and Autor and the SOC classifications
#soc.occ<-read.csv(paste0(base.path,"Data/Autor and Acemoglu 2011 tab-05/crosswalk/soc-occ-2000.csv"),colClasses = "character")
#acemoglu.autor<-readstata13::read.dta13(paste0(base.path,"Data/Autor and Acemoglu 2011 task construction/onet-soc.dta")) %>%
#  mutate(onetsoccode=as.character(onetsoccode))

lucid.2<-read.csv(paste0(base.path,"Data/Lucid Oct 28/Robots and Foreigners_dl2020_10_29_trimmed.csv"),stringsAsFactors = FALSE) %>% filter(infconsent == "Yes - Start Session" & honestypledge == "Yes! I will read carefully." & attentioncheck == "Extremely interested,Very interested") %>% mutate(sample_date="Oct 28",zip=as.character(zip))

lucid.1<-read.csv(paste0(base.path,"Data/Lucid Sep 23/Robots and Foreigners_September 25, 2020_13.21trimmed.csv"),stringsAsFactors = FALSE) %>% filter(infconsent == "Yes - Start Session") %>% mutate(sample_date="Sep 25",zip=as.character(zip))

lucid<-bind_rows(lucid.1,lucid.2)

lucid.question.key<-as.data.frame(t(lucid[1,])) %>% 
  tibble::rownames_to_column() %>%
  rename(q_name=rowname,question_text=`1`)

lucid.proc.pre<-lucid %>% 
  mutate_at(vars(age,LocationLongitude,LocationLatitude,Duration..in.seconds.,Q1_1,Q20_1,Q74_1,Q76_1,Q2_1,Q21_1,Q75_1,Q77_1,auto_dom_reg_1,Q23_1,Q78_1,Q81_1),as.numeric) %>%
  mutate(nationalism_questions_agreed=(nat1_1>=50) + (nat2_1>=50) + (nat3_1>=50),
         nationalism_average_response=(nat1_1+nat2_1+nat3_1)/300,
         party=case_when(
           political_party<=3 | political_party==6 ~ "Democrat",
           political_party>=8 | political_party==5 ~ "Republican",
           political_party==4 | political_party==7 ~ "Other"
         )) %>%
  mutate(treatment_foreign_domestic=stringr::str_extract(treatment,"Foreign|Domestic"),
         treatment_automation_labor=stringr::str_extract(treatment,"Automation|Labor"),
         treatment_fdal=paste0(treatment_foreign_domestic," ",treatment_automation_labor),
         treatment_nationalism=grepl("Nationalism",treatment),
         benefits_to_unemployed=case_when(
           !is.na(Q1_1) ~ Q1_1,
           !is.na(Q20_1) ~ Q20_1,
           !is.na(Q74_1) ~ Q74_1,
           !is.na(Q76_1) ~ Q76_1
         ),
         restrict_imports=case_when(
           !is.na(Q2_1) ~ Q2_1,
           !is.na(Q21_1) ~ Q21_1,
           !is.na(Q75_1) ~ Q75_1,
           !is.na(Q77_1) ~ Q77_1
         ),
         restrict_automation=case_when(
           !is.na(auto_dom_reg_1) ~ auto_dom_reg_1,
           !is.na(Q23_1) ~ Q23_1,
           !is.na(Q78_1) ~ Q78_1,
           !is.na(Q81_1) ~ Q81_1
         ),
         restrict_immigration=case_when(
           !is.na(Q4_1) ~ Q4_1,
           !is.na(Q24_1) ~ Q24_1,
           !is.na(Q79_1) ~ Q79_1,
           !is.na(Q82_1.1) ~ Q82_1.1
         ),
         promote_unions=case_when(
           !is.na(Q5_1) ~ Q5_1,
           !is.na(Q25_1) ~ Q25_1,
           !is.na(Q80_1) ~ Q80_1,
           !is.na(Q83_1.1) ~ Q83_1.1
         ),
         quality_declines=case_when(
           !is.na(Q82_1) ~ Q82_1,
           !is.na(Q83_1) ~ Q83_1,
           !is.na(Q84_1) ~ Q84_1,
           !is.na(Q85_1) ~ Q85_1
         )
  ) %>%
  mutate_at(vars(restrict_imports,restrict_automation,benefits_to_unemployed), .funs=list(binned=~case_when(
    . <= 20 ~ 0,
    .>20 & .<=40 ~ 1,
    .>40 & .<=60 ~ 2,
    .>60 & .<=80 ~ 3,
    .>80 ~ 4,
    TRUE ~ NA_real_
  ))) %>%
  #Post processing of outcomes
  mutate(
    benefits_to_unemployed_share=benefits_to_unemployed/(restrict_imports+restrict_automation+benefits_to_unemployed),
    restrict_imports_share=restrict_imports/(restrict_imports+restrict_automation+benefits_to_unemployed),
    restrict_automation_share=restrict_automation/(restrict_imports+restrict_automation+benefits_to_unemployed),
    restrict_imports_share_2=restrict_imports/(restrict_imports+restrict_automation),
    restrict_automation_difference=restrict_automation-benefits_to_unemployed,
    restrict_imports_difference=restrict_imports-benefits_to_unemployed,
    restrict_automation_share_2=restrict_automation/(restrict_imports+restrict_automation),
    distribution_index=(restrict_imports+restrict_automation)/2,
    distribution_transfers_index=(distribution_index+1)/(benefits_to_unemployed+1),
    min_imports_automation=pmin(restrict_imports,restrict_automation)) %>%
  #Relevel factors
  mutate(
    treatment_foreign_domestic=factor(treatment_foreign_domestic,levels=c("Domestic","Foreign")),
    treatment_foreign_domestic_num=as.numeric(treatment_foreign_domestic=="Foreign"),
    treatment_automation_labor=factor(treatment_automation_labor,levels=c("Labor","Automation")),
    treatment_automation_labor_num=as.numeric(treatment_automation_labor=="Automation"),
    treatment_fdal=factor(treatment_fdal,levels=c("Domestic Labor","Domestic Automation","Foreign Labor","Foreign Automation"))) %>%
  #Process manipulation checks
  mutate(manipulation_check_treatment=case_when(
    manip2=="Competition from foreign workers." ~ "Foreign Labor",
    manip2=="Competition from workers in other parts of the United States." ~ "Domestic Labor",
    manip2=="Workers replaced by automation developed by foreign companies." ~ "Foreign Automation",
    manip2=="Workers replaced by automation developed by US companies." ~ "Domestic Automation",
    TRUE ~ NA_character_
  ),
  manip1_pass=manip1=="Automobiles/trucks",
  manip2_fd_pass=treatment_foreign_domestic==stringr::str_extract(manipulation_check_treatment,"Domestic|Foreign"),
  manip2_al_pass=treatment_automation_labor==stringr::str_extract(manipulation_check_treatment,"Automation|Labor"),
  manip2_pass=manip2_al_pass==TRUE & manip2_fd_pass==TRUE,
  manip3_pass=manip3=="Tariffs on imports.",
  checks_passed=manip1_pass+manip2_fd_pass+manip2_al_pass+manip3_pass,
  passes_all_checks=checks_passed==4) %>%
  #Process time reading
  mutate_at(vars(contains("Page.Submit")),as.numeric) %>%
  mutate(time_reading=rowSums(select(.,contains("Page.Submit")),na.rm=TRUE)) %>%
  #Miscellaneous
  mutate(hhi=ifelse(hhi==-3105,NA,hhi),
         education=ifelse(education==-3105,NA,education)) %>%
  #Dropping these observations here to simplify the analysis
  filter(infconsent!="No") %>%
  #OCCUPATIONS
  #First, build the ONET SOC occupation code
  mutate(soc=gsub("-","",substr(occupation,1,7))) %>%
  left_join(acemoglu.autor,by=c("soc"="onetsoccode")) %>%
  mutate(max_threat=pmax(r_man,r_cog,offshor))

lucid.proc.full<-purrr::reduce(c(list(lucid.proc.pre),lucid.codebook.proc),function(x,y) left_join(x,y)) %>%
  mutate_at(vars(ethnicity,hispanic,region),as.factor) %>% 
  mutate(
    ethnicity_num=as.numeric(as.character(ethnicity)),
    male=gender_str=="Male",
    hhi_num=as.numeric(hhi_str),
    white=ethnicity_num==1,
    black=ethnicity_num==2,
    aapi=ethnicity_num>=4 & ethnicity_num<=14,
    hispanicbin=hispanic!=1 & hispanic!=15,
    educ_nodegree=education<=4,
    educ_ba=education==6,
    educ_advdegree=education == 7 | education == 8
  ) 

lucid.proc<-lucid.proc.full %>% filter(time_reading>=30)

# 7 point ideology score
lucid.proc$ideology7pt <- ifelse(lucid.proc$Q43=="Extremely liberal",1,
                                 ifelse(lucid.proc$Q43=="Liberal",2,
                                        ifelse(lucid.proc$Q43=="Slightly liberal",3,
                                               ifelse(lucid.proc$Q43=="Moderate",4,
                                                      ifelse(lucid.proc$Q43=="Slightly conservative",5,
                                                             ifelse(lucid.proc$Q43=="Conservative",6,
                                                                    ifelse(lucid.proc$Q43=="Extremely conservative",7,-99)))))))
library(naniar)
lucid.proc <- lucid.proc %>% replace_with_na(replace = list(ideology7pt = -99))                                               

# Party dummies
lucid.proc$party_dem <- ifelse(lucid.proc$party=="Democrat",1,0)
lucid.proc$party_rep <- ifelse(lucid.proc$party=="Republican",1,0)
lucid.proc$party_other <- ifelse(lucid.proc$party=="Other",1,0)



#This line defines the standard control variables which is also used later in the analysis
balance_controls_vec<-c("educ_nodegree","educ_ba","educ_advdegree","male","hispanicbin","region","white","black","hispanicbin","hhi_num","party_dem","party_rep")


plotter<-function(subsample,depvar,return_df=FALSE,graph_type=c("pdf","cdf")){
  
  for.plotting<-lucid.proc %>% 
    filter(treatment_automation_labor==subsample) %>%
    group_by(treatment_foreign_domestic) %>%
    mutate(val_mean:=mean(!!sym(depvar),na.rm=TRUE)) %>%
    ungroup() 
  
  plt<-for.plotting %>%
    ggplot(aes(!!sym(depvar),color=treatment_foreign_domestic))+
    geom_vline(aes(xintercept=val_mean,color=treatment_foreign_domestic))+
    scale_color_manual(values=c("Domestic"="#6699CC","Foreign"="#004488"))+
    theme_bw()+
    theme(legend.position="none")+
    labs(x=stringr::str_to_title(stringr::str_replace_all(depvar,"_"," ")))
  
  if(return_df==TRUE){
    return(for.plotting)
  } else{
    if(graph_type[1]=="cdf"){
      return(plt+stat_ecdf())
    } else {
      return(plt+geom_density(bw=10))
    }
  }
  
}

